This analysis builds on our previous work on status measurement and clustering. We will focus on three main objectives:
First, we’ll load our integrated status dataset and any external datasets for validation:
# Load the integrated status dimensions data
integrated_status <- read.csv("integrated_status_dimensions.csv", stringsAsFactors = FALSE)
# Load external validation datasets if available
# For example: CINC scores, GDP data, etc.
# Example (uncomment and modify as needed):
# external_data <- read.csv("external_status_measures.csv", stringsAsFactors = FALSE)
# Preview the data
glimpse(integrated_status)## Rows: 1,505
## Columns: 29
## $ country <chr> "Czechoslovakia", "Egypt", "France", "Ind…
## $ recognition_count <int> 39, 58, 82, 55, 33, 36, 73, 58, 32, 23, 4…
## $ weighted_recognition <dbl> 49.625, 60.250, 75.500, 43.750, 31.750, 3…
## $ eigenvector_centrality <dbl> 0.52822030, 0.71045720, 1.00000000, 0.660…
## $ pagerank <dbl> 0.012528771, 0.019578976, 0.037892397, 0.…
## $ authority <dbl> 0.54672243, 0.72487574, 1.00000000, 0.665…
## $ betweenness <dbl> 0.0244847825, 0.0001410069, 0.0260780360,…
## $ recognition_rate <dbl> 0.39393939, 0.58585859, 0.82828283, 0.555…
## $ network_inconsistency <dbl> 0.2902262, 0.8616718, 1.2127113, 0.540182…
## $ outgoing_ties <int> 59, 61, 83, 44, 34, 32, 78, 60, 44, 25, 5…
## $ recognition_balance <int> -20, -3, -1, 11, -1, 4, -5, -2, -12, -2, …
## $ recognition_ratio <dbl> 0.6610169, 0.9508197, 0.9879518, 1.250000…
## $ recognition_status_pca <dbl> 0.5300235, 0.7343870, 1.0000000, 0.641582…
## $ prestige_status_pca <dbl> 0.43957121, 0.62576816, 1.00000000, 0.604…
## $ brokerage_status_pca <dbl> 0.38788572, 0.32058496, 0.63613453, 0.604…
## $ overall_status_network_pca <dbl> 0.5032924, 0.6427105, 1.0000000, 0.656207…
## $ year <int> 1960, 1960, 1960, 1960, 1960, 1960, 1960,…
## $ external_internal_ratio <dbl> 0.6610169, 0.9508197, 0.9879518, 1.250000…
## $ attribute_status_pca <dbl> -1.47260670, -1.59817226, 2.71194106, 0.2…
## $ attribute_status_pca_z <dbl> -0.642036308, -0.696781170, 1.182369075, …
## $ overall_status_network_pca_z <dbl> 0.98399426, 1.66919367, 3.42516686, 1.735…
## $ combined_status_score <dbl> 0.17097897, 0.48620625, 2.30376797, 0.926…
## $ attribute_status_quartile <int> 2, 2, 4, 4, 2, 2, 4, 4, 2, 1, 4, 4, 4, 1,…
## $ network_status_quartile <int> 4, 4, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 4, 3,…
## $ combined_status_quartile <int> 3, 4, 4, 4, 3, 3, 4, 4, 3, 2, 4, 4, 4, 2,…
## $ status_type <chr> "Upper-Middle Status", "High Status", "Hi…
## $ status_inconsistency <dbl> 1.6260306, 2.3659748, 2.2427978, 1.617973…
## $ cluster <int> 3, 3, 4, 3, 3, 3, 4, 4, 3, 1, 3, 4, 4, 1,…
## $ hc_cluster <int> 1, 2, 3, 2, 1, 1, 2, 2, 1, 4, 1, 3, 3, 4,…
We’ll compare our status measures with established external indicators to assess validity:
# If you have external data, merge it with the integrated_status data
# Example:
# validation_data <- integrated_status %>%
# left_join(external_data, by = c("country", "year"))
# For this example, we'll use CINC as an external measure (if available in your data)
# If CINC is not available in your dataset, replace with another relevant measure
# Check if CINC is available in the data
if("cinc" %in% colnames(integrated_status)) {
# Correlation analysis
cor_data <- integrated_status %>%
select(combined_status_score, attribute_status_pca, overall_status_network_pca, cinc) %>%
na.omit()
# Correlation matrix
cor_matrix <- cor(cor_data)
# Visualize correlation matrix
corrplot(cor_matrix, method = "color", type = "upper",
addCoef.col = "black", tl.col = "black", tl.srt = 45,
title = "Correlation between Status Measures and CINC")
# Scatter plots
p1 <- ggplot(cor_data, aes(x = cinc, y = combined_status_score)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(x = "CINC Score", y = "Combined Status Score",
title = "Combined Status vs. CINC") +
theme_minimal()
p2 <- ggplot(cor_data, aes(x = cinc, y = attribute_status_pca)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(x = "CINC Score", y = "Attribute Status",
title = "Attribute Status vs. CINC") +
theme_minimal()
p3 <- ggplot(cor_data, aes(x = cinc, y = overall_status_network_pca)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = TRUE) +
labs(x = "CINC Score", y = "Network Status",
title = "Network Status vs. CINC") +
theme_minimal()
grid.arrange(p1, p2, p3, ncol = 3)
} else {
# If CINC is not available, provide instructions for modification
cat("CINC data not found in the dataset. Please modify this code to use available external measures for validation.")
# Alternative approach - create a sample external measure for demonstration
# This is just for illustration - replace with actual external data
integrated_status <- integrated_status %>%
mutate(
sample_external_measure = scale(attribute_status_pca + rnorm(n(), sd = 0.5))[,1]
)
# Correlation with sample external measure
cor_data <- integrated_status %>%
select(combined_status_score, attribute_status_pca, overall_status_network_pca, sample_external_measure) %>%
na.omit()
# Correlation matrix
cor_matrix <- cor(cor_data)
# Visualize correlation matrix
corrplot(cor_matrix, method = "color", type = "upper",
addCoef.col = "black", tl.col = "black", tl.srt = 45,
title = "Correlation between Status Measures and Sample External Measure")
}## CINC data not found in the dataset. Please modify this code to use available external measures for validation.
We’ll select a few countries with interesting status trajectories for in-depth case studies:
# Identify countries with interesting status profiles or changes
# 1. Countries with large status changes over time
status_change <- integrated_status %>%
select(country, year, combined_status_score) %>%
arrange(country, year) %>%
group_by(country) %>%
mutate(
next_score = lead(combined_status_score),
score_change = next_score - combined_status_score
) %>%
filter(!is.na(score_change)) %>%
ungroup()
# Top 5 countries with largest positive change
top_risers <- status_change %>%
group_by(country) %>%
summarise(total_change = sum(score_change, na.rm = TRUE)) %>%
arrange(desc(total_change)) %>%
head(5)
# Top 5 countries with largest negative change
top_decliners <- status_change %>%
group_by(country) %>%
summarise(total_change = sum(score_change, na.rm = TRUE)) %>%
arrange(total_change) %>%
head(5)
# 2. Countries with status inconsistency (high attribute, low network or vice versa)
status_inconsistency <- integrated_status %>%
group_by(country) %>%
summarise(
mean_inconsistency = mean(status_inconsistency, na.rm = TRUE),
n_obs = n()
) %>%
filter(n_obs >= 3) %>% # At least 3 observations
arrange(desc(mean_inconsistency)) %>%
head(5)
# Display the interesting countries
interesting_countries <- c(
top_risers$country,
top_decliners$country,
status_inconsistency$country
)
# Create a data frame with just these countries
case_study_data <- integrated_status %>%
filter(country %in% interesting_countries)
# Plot the status trajectories of case study countries
ggplot(case_study_data, aes(x = year, y = combined_status_score, color = country, group = country)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(x = "Year", y = "Combined Status Score",
title = "Status Trajectories of Case Study Countries") +
theme_minimal() +
theme(legend.position = "right")# Create individual plots for top status risers
for(country_name in top_risers$country[1:3]) { # First 3 top risers
country_data <- integrated_status %>%
filter(country == country_name)
p <- ggplot(country_data, aes(x = year)) +
geom_line(aes(y = attribute_status_pca, color = "Attribute Status"), size = 1) +
geom_line(aes(y = overall_status_network_pca, color = "Network Status"), size = 1) +
geom_line(aes(y = combined_status_score, color = "Combined Status"), size = 1, linetype = "dashed") +
labs(x = "Year", y = "Status Scores", color = "Status Dimension",
title = paste("Status Trajectory for", country_name)) +
theme_minimal()
print(p)
}# Create individual plots for countries with status inconsistency
for(country_name in status_inconsistency$country[1:3]) { # First 3 inconsistent countries
country_data <- integrated_status %>%
filter(country == country_name)
p <- ggplot(country_data, aes(x = year)) +
geom_line(aes(y = attribute_status_pca, color = "Attribute Status"), size = 1) +
geom_line(aes(y = overall_status_network_pca, color = "Network Status"), size = 1) +
geom_line(aes(y = status_inconsistency, color = "Status Inconsistency"), size = 1, linetype = "dotted") +
labs(x = "Year", y = "Status Scores", color = "Status Dimension",
title = paste("Status Inconsistency for", country_name)) +
theme_minimal()
print(p)
}Create interactive visualizations to explore status dimensions:
if(!"ir_category" %in% colnames(integrated_status)) {
# Create IR categories based on quartiles of combined status score
integrated_status <- integrated_status %>%
group_by(year) %>%
mutate(
status_quartile = ntile(combined_status_score, 4),
ir_category = case_when(
status_quartile == 4 ~ "Great Power",
status_quartile == 3 ~ "Major Power",
status_quartile == 2 ~ "Middle Power",
status_quartile == 1 ~ "Minor Power"
)
) %>%
ungroup()
}
# Create interactive scatter plot with plotly
interactive_plot <- integrated_status %>%
filter(year == max(year)) %>% # Most recent year
plot_ly(
x = ~attribute_status_pca,
y = ~overall_status_network_pca,
color = ~ir_category,
size = ~status_inconsistency,
text = ~paste("Country:", country,
"<br>Attribute Status:", round(attribute_status_pca, 2),
"<br>Network Status:", round(overall_status_network_pca, 2),
"<br>Combined Score:", round(combined_status_score, 2),
"<br>IR Category:", ir_category),
type = "scatter",
mode = "markers"
) %>%
layout(
title = paste("Interactive Status Space -", max(integrated_status$year)),
xaxis = list(title = "Attribute-Based Status"),
yaxis = list(title = "Network-Based Status"),
hovermode = "closest"
)
# Display the interactive plot
interactive_plot# Create a 3D visualization using t-SNE for dimensionality reduction
# This allows visualizing multiple status dimensions simultaneously
if(length(unique(integrated_status$year)) >= 3) {
# Select a subset of years for visualization
years_to_viz <- sort(unique(integrated_status$year), decreasing = TRUE)[1:3]
tsne_data <- integrated_status %>%
filter(year %in% years_to_viz) %>%
select(country, year, attribute_status_pca, overall_status_network_pca,
combined_status_score, status_inconsistency, ir_category)
# Run t-SNE on the status variables
set.seed(123) # For reproducibility
tsne_result <- tsne(tsne_data %>%
select(attribute_status_pca, overall_status_network_pca,
combined_status_score, status_inconsistency) %>%
as.matrix())
# Add t-SNE coordinates to the data
tsne_data$tsne1 <- tsne_result[,1]
tsne_data$tsne2 <- tsne_result[,2]
# Create interactive t-SNE plot
tsne_plot <- plot_ly(
data = tsne_data,
x = ~tsne1,
y = ~tsne2,
color = ~ir_category,
symbol = ~factor(year),
text = ~paste("Country:", country,
"<br>Year:", year,
"<br>IR Category:", ir_category,
"<br>Combined Score:", round(combined_status_score, 2)),
type = "scatter",
mode = "markers"
) %>%
layout(
title = "t-SNE Visualization of Status Dimensions",
xaxis = list(title = "t-SNE Dimension 1"),
yaxis = list(title = "t-SNE Dimension 2"),
hovermode = "closest"
)
# Display the t-SNE plot
tsne_plot
}# Create interactive time series visualization of status evolution
# Select a few prominent countries for visualization
prominent_countries <- c(
"United States", "USSR", "Russia", "China",
"United Kingdom", "France", "Germany", "Japan",
"India", "Brazil"
)
# Filter for countries that exist in our dataset
available_countries <- prominent_countries[prominent_countries %in% unique(integrated_status$country)]
if(length(available_countries) > 0) {
time_series_data <- integrated_status %>%
filter(country %in% available_countries)
# Create interactive time series plot
time_series_plot <- plot_ly() %>%
layout(
title = "Status Evolution of Major Powers",
xaxis = list(title = "Year"),
yaxis = list(title = "Combined Status Score"),
hovermode = "closest"
)
# Add a line for each country
for(country_name in available_countries) {
country_data <- time_series_data %>%
filter(country == country_name)
time_series_plot <- time_series_plot %>%
add_trace(
data = country_data,
x = ~year,
y = ~combined_status_score,
name = country_name,
type = "scatter",
mode = "lines+markers",
text = ~paste("Country:", country,
"<br>Year:", year,
"<br>Combined Score:", round(combined_status_score, 2),
"<br>IR Category:", ir_category)
)
}
# Display the time series plot
time_series_plot
}Test the relationship between attribute-based and network-based status measures:
# Linear regression of network status on attribute status
lm_model <- lm(overall_status_network_pca ~ attribute_status_pca, data = integrated_status)
# Display regression summary
summary(lm_model)##
## Call:
## lm(formula = overall_status_network_pca ~ attribute_status_pca,
## data = integrated_status)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54601 -0.11676 -0.01139 0.09902 0.53879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.303078 0.003954 76.65 <2e-16 ***
## attribute_status_pca 0.058309 0.001724 33.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1534 on 1503 degrees of freedom
## Multiple R-squared: 0.432, Adjusted R-squared: 0.4317
## F-statistic: 1143 on 1 and 1503 DF, p-value: < 2.2e-16
# Plot the relationship with regression line
ggplot(integrated_status, aes(x = attribute_status_pca, y = overall_status_network_pca)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", se = TRUE) +
labs(x = "Attribute-Based Status", y = "Network-Based Status",
title = "Relationship Between Attribute and Network Status") +
theme_minimal()# Check for non-linear relationships using a GAM
if(require(mgcv)) {
gam_model <- gam(overall_status_network_pca ~ s(attribute_status_pca), data = integrated_status)
# Summary of GAM model
summary(gam_model)
# Plot the GAM fit
ggplot(integrated_status, aes(x = attribute_status_pca, y = overall_status_network_pca)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "gam", formula = y ~ s(x), se = TRUE) +
labs(x = "Attribute-Based Status", y = "Network-Based Status",
title = "Non-linear Relationship Between Attribute and Network Status") +
theme_minimal()
# Compare linear and non-linear models
AIC(lm_model, gam_model)
}## df AIC
## lm_model 3.00000 -1367.998
## gam_model 10.04312 -1486.076
# Test for relationship by year to see if it changes over time
# We'll run separate regressions for each year and compare coefficients
year_models <- integrated_status %>%
nest_by(year) %>%
mutate(
model = list(lm(overall_status_network_pca ~ attribute_status_pca, data = data)),
tidied = list(tidy(model))
) %>%
unnest(tidied) %>%
filter(term == "attribute_status_pca") %>%
select(year, estimate, std.error, p.value)
# Display coefficient changes over time
ggplot(year_models, aes(x = year, y = estimate)) +
geom_point() +
geom_errorbar(aes(ymin = estimate - 1.96*std.error,
ymax = estimate + 1.96*std.error), width = 0.2) +
geom_line() +
labs(x = "Year", y = "Coefficient Estimate",
title = "Relationship Between Attribute and Network Status Over Time") +
theme_minimal()Conduct ANOVA to compare status across different groups:
# ANOVA to compare attribute status across IR categories
anova_attribute <- aov(attribute_status_pca ~ ir_category, data = integrated_status)
summary(anova_attribute)## Df Sum Sq Mean Sq F value Pr(>F)
## ir_category 3 3633 1211.0 424.7 <2e-16 ***
## Residuals 1501 4279 2.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA to compare network status across IR categories
anova_network <- aov(overall_status_network_pca ~ ir_category, data = integrated_status)
summary(anova_network)## Df Sum Sq Mean Sq F value Pr(>F)
## ir_category 3 47.23 15.74 1572 <2e-16 ***
## Residuals 1501 15.04 0.01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA to compare combined status across IR categories
anova_combined <- aov(combined_status_score ~ ir_category, data = integrated_status)
summary(anova_combined)## Df Sum Sq Mean Sq F value Pr(>F)
## ir_category 3 895.7 298.55 1278 <2e-16 ***
## Residuals 1501 350.6 0.23
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Post-hoc tests for combined status
if(require(emmeans)) {
posthoc <- emmeans(anova_combined, pairwise ~ ir_category)
summary(posthoc)
} else {
# Alternative using TukeyHSD
tukey_results <- TukeyHSD(anova_combined)
print(tukey_results)
}## $emmeans
## ir_category emmean SE df lower.CL upper.CL
## Great Power 1.2075 0.0251 1501 1.1583 1.257
## Major Power 0.0912 0.0249 1501 0.0423 0.140
## Middle Power -0.3928 0.0249 1501 -0.4415 -0.344
## Minor Power -0.8785 0.0248 1501 -0.9271 -0.830
##
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## Great Power - Major Power 1.116 0.0354 1501 31.562 <.0001
## Great Power - Middle Power 1.600 0.0353 1501 45.306 <.0001
## Great Power - Minor Power 2.086 0.0353 1501 59.135 <.0001
## Major Power - Middle Power 0.484 0.0352 1501 13.748 <.0001
## Major Power - Minor Power 0.970 0.0352 1501 27.583 <.0001
## Middle Power - Minor Power 0.486 0.0351 1501 13.835 <.0001
##
## P value adjustment: tukey method for comparing a family of 4 estimates
# Visualize status differences across IR categories
ggplot(integrated_status, aes(x = ir_category, y = combined_status_score, fill = ir_category)) +
geom_boxplot() +
labs(x = "IR Category", y = "Combined Status Score",
title = "Status Score Differences Across IR Categories") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Compare status inconsistency across IR categories
anova_inconsistency <- aov(status_inconsistency ~ ir_category, data = integrated_status)
summary(anova_inconsistency)## Df Sum Sq Mean Sq F value Pr(>F)
## ir_category 3 13.6 4.547 18.03 1.71e-11 ***
## Residuals 1501 378.6 0.252
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Visualize status inconsistency across IR categories
ggplot(integrated_status, aes(x = ir_category, y = status_inconsistency, fill = ir_category)) +
geom_boxplot() +
labs(x = "IR Category", y = "Status Inconsistency",
title = "Status Inconsistency Across IR Categories") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))Analyze the factors that contribute to status consistency/inconsistency:
# Regression model to predict status inconsistency
consistency_model <- lm(status_inconsistency ~ attribute_status_pca +
overall_status_network_pca +
factor(year),
data = integrated_status)
# Display model summary
summary(consistency_model)##
## Call:
## lm(formula = status_inconsistency ~ attribute_status_pca + overall_status_network_pca +
## factor(year), data = integrated_status)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1380 -0.3580 -0.0790 0.2812 4.0775
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.782794 0.067599 11.580 < 2e-16 ***
## attribute_status_pca 0.028459 0.009111 3.124 0.001821 **
## overall_status_network_pca 0.362602 0.093169 3.892 0.000104 ***
## factor(year)1965 -0.222506 0.070277 -3.166 0.001576 **
## factor(year)1970 -0.225982 0.068832 -3.283 0.001050 **
## factor(year)1975 -0.249145 0.067786 -3.675 0.000246 ***
## factor(year)1980 -0.257665 0.067171 -3.836 0.000130 ***
## factor(year)1985 -0.356704 0.067819 -5.260 1.65e-07 ***
## factor(year)1990 -0.384170 0.068655 -5.596 2.61e-08 ***
## factor(year)1995 -0.292182 0.068837 -4.245 2.33e-05 ***
## factor(year)2000 -0.273767 0.069796 -3.922 9.17e-05 ***
## factor(year)2005 -0.160104 0.070881 -2.259 0.024041 *
## factor(year)2010 -0.150788 0.070820 -2.129 0.033404 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4874 on 1492 degrees of freedom
## Multiple R-squared: 0.09641, Adjusted R-squared: 0.08915
## F-statistic: 13.27 on 12 and 1492 DF, p-value: < 2.2e-16
# Visualize status inconsistency by year
ggplot(integrated_status, aes(x = factor(year), y = status_inconsistency)) +
stat_summary(fun = mean, geom = "bar", fill = "skyblue") +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
labs(x = "Year", y = "Mean Status Inconsistency",
title = "Status Inconsistency Over Time") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))# Identify countries with largest inconsistencies over time
top_inconsistent <- integrated_status %>%
group_by(country) %>%
summarise(
mean_inconsistency = mean(status_inconsistency, na.rm = TRUE),
n_years = n()
) %>%
filter(n_years >= 3) %>% # At least 3 observations
arrange(desc(mean_inconsistency)) %>%
head(10)
# Display top inconsistent countries
top_inconsistent %>%
kable("html", caption = "Countries with Highest Status Inconsistency") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| country | mean_inconsistency | n_years |
|---|---|---|
| Yugoslavia | 2.043225 | 7 |
| Luxembourg | 1.687992 | 11 |
| Czechoslovakia | 1.680845 | 7 |
| Cuba | 1.661572 | 11 |
| Northern Cyprus, Turkish Republic of | 1.631904 | 6 |
| Egypt | 1.593889 | 11 |
| United States | 1.545607 | 11 |
| China | 1.478240 | 11 |
| German Democratic Republic | 1.424568 | 6 |
| Ethiopia | 1.222657 | 7 |